I did this a bit flippantly before, but I want to fomalize the process by which we estimate the uncertainty on emulator predictions.

The biggest problem is at small scales, and I'm gonna look at those bins individually.

Fixes to try:

  • refit with SJ, but removing the yerr weighting
  • New, simpler kernel?
  • Fit hps with MaxLike and BO in addition to SloppyJoes?
  • Plot sat fraction, consdier reducing HOD param space size?

In [51]:
from pearce.emulator import SpicyBuffalo, LemonPepperWet, OriginalRecipe
from pearce.mocks import cat_dict
import numpy as np
from os import path

In [52]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

In [53]:
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file =  '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'
#xi gm training_file = '/scratch/users/swmclau2/xi_gm_cosmo/PearceRedMagicXiGMCosmoFixedNd.hdf5' test_file = '/scratch/users/swmclau2/xi_gm_cosmo_test2/PearceRedMagicXiGMCosmoFixedNdTest.hdf5'

In [54]:
em_method = 'gp'
split_method = 'random'

In [55]:
a = 1.0
z = 1.0/a - 1.0

In [56]:
bin_idx = 0
fixed_params = {'z':z, 'r': 0.09581734}#, 'cosmo': 0}#, 'r':24.06822623}
hp = np.loadtxt('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_result_indiv_bins.npy')

In [57]:
from glob import glob
hp = np.loadtxt('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_indiv_bins/sloppy_joes_result_indiv_bin_%2d.npy'%bin_idx)

In [58]:
hp = np.array([  8.22518016e+00,  -8.48981351e+00,   8.71510289e+00,  -4.00883505e+00,
  -1.20000000e+01,   6.39814872e+00,   2.41769925e+00,   1.28070602e+00,
  -3.23773108e-01,   8.24276778e+00,   1.20000000e+01,  -7.20251694e+00,
  -1.20000000e+01,  -5.17385710e+00,  -4.80026082e-01,  -8.76781990e-01,
  -3.99855599e+00,   1.10634731e+01,  -5.40163410e+00,   1.20000000e+01,
   9.29994915e+00,  -5.05724758e-01,   1.20000000e+01,  -8.49500340e-03,
   7.79086155e+00])

In [59]:
param_names = ['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff', 'logM0', 'sigma_logM', 'logM1', 'alpha']

In [60]:
pnames = ['bias', 'amp']
pnames.extend(param_names)
pnames.append('amp')
pnames.extend(param_names)
from collections import defaultdict metric = defaultdict(list) for val, pname in zip(hp, pnames): metric[pname].append(val)

In [61]:
from collections import defaultdict
metric = defaultdict(list)

for val, pname in zip(hp, pnames):
    metric[pname].append(val)

In [62]:
from time import time
np.random.seed(int(time()))
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,
                 custom_mean_function = 'linear', downsample_factor = 0.1, hyperparams = {'metric':metric})


/home/users/swmclau2/.local/lib/python2.7/site-packages/pearce/emulator/emu.py:294: UserWarning: WARNING: NaN detected. Skipped 19 points in training data.
  warnings.warn('WARNING: NaN detected. Skipped %d points in training data.' % (num_skipped))
emu.train_metric()

In [ ]:
emu.downsample_x.shape


Out[ ]:
(3998, 11)

In [ ]:
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)

In [ ]:
test_x, test_y, test_cov, _ = emu.get_data(test_file, emu.fixed_params)

t, old_idxs  = emu._whiten(test_x)

In [ ]:
params = dict(zip(emu.get_param_names(), test_x[0,:]))

print emu.emulate(params)[0], test_y[0], data_y[0], pred_y[0]
train_x, train_y, train_err, info = emu.get_data(test_file, emu.fixed_params)

In [ ]:
mean_func_at_params = emu.mean_function(t)

In [ ]:
print np.sqrt(np.mean(np.square((pred_y-data_y)/data_y)))

In [ ]:
resmat_flat = 10**pred_y - 10**data_y
datamat_flat = 10**data_y

In [ ]:
t_bin = t
acc_bin = np.abs(resmat_flat)/datamat_flat

In [ ]:
print np.sqrt(np.mean(np.square(acc_bin)))
print np.mean(acc_bin)

In [ ]:
percentiles = np.percentile(acc_bin, range(101))
norm_acc_bin = np.digitize(acc_bin, percentiles)
#norm_acc_bin = 100*((acc_bin - acc_bin.min())/acc_bin.max()).astype(int)

In [ ]:
palette = sns.diverging_palette(220, 20, n=len(percentiles)-1, as_cmap=True)
#sns.set_palette(palette)

In [ ]:
pnames = emu.get_param_names()
for axes1 in xrange(7,11): for axes2 in xrange(axes1+1, 11): cbar = plt.scatter(t_bin[:,axes1 ], t_bin[:,axes2], c = norm_acc_bin,cmap = palette, alpha = 0.2) plt.colorbar(cbar) plt.xlabel(pnames[axes1]) plt.ylabel(pnames[axes2]) #plt.gray() plt.show()

In [ ]:


In [ ]:


In [ ]: